from IPython.core.display import display, HTML
display(HTML("""<style> .container {width:96% !important;}</style>"""))
from IPython.display import IFrame
import pandas as pd
import numpy as np
# from plotly.offline import init_notebook_mode, iplot
# import cufflinks as cf
# init_notebook_mode()
# cf.go_offline()
from __future__ import division
!ls additional_data
path = 'additional_data/'
nat34 = pd.read_csv(path + 'nat03_04.csv')
nat56 = pd.read_csv(path + 'nat05_06.csv')
nat78 = pd.read_csv(path + 'nat07_08.csv')
print nat34.shape
print nat56.shape
print nat78.shape
nat34.head().T
# Default if MIS_status = CHGOFF, else MIS_status = PIF
print nat34.MIS_Status.value_counts()
print 'Default rate =', (nat34.MIS_Status == 'CHGOFF').sum() / len(nat34)
print (nat34.MIS_Status == 'CHGOFF').sum() / len(nat34)
print (nat56.MIS_Status == 'CHGOFF').sum() / len(nat56)
print (nat78.MIS_Status == 'CHGOFF').sum() / len(nat78)
nat34.BalanceGross.value_counts()
nat34.State.value_counts().head()
nat34.City.value_counts().head()
nat34.Zip.value_counts().head()
nat34.UrbanRural.value_counts()
nat34.Bank.value_counts().head()
nat34.NAICS.value_counts().head()
nat34['ApprovalDate'] = pd.to_datetime(nat34.ApprovalDate)
nat34.ApprovalDate.dt.month.value_counts()
nat34.ApprovalFY.value_counts().head()
nat34.NoEmp.value_counts().head()
nat34.NewExist.value_counts()
nat34[nat34.MIS_Status == 'CHGOFF'].sample(10).T
nat34[nat34.MIS_Status == 'CHGOFF'].City.value_counts().head(10).iplot(kind = 'bar')
nat34[nat34.MIS_Status == 'CHGOFF'].Zip.value_counts().head(10)
nat34[nat34.MIS_Status == 'CHGOFF'].Zip.astype(str).str[:3].value_counts().head(10)

nat34[nat34.MIS_Status == 'CHGOFF'].City.astype(str).str[:3].value_counts().head(10)
nat34[nat34.MIS_Status == 'CHGOFF'].Bank.value_counts().head()
nat34.UrbanRural.value_counts()
nat34.RevLineCr.value_counts()
train[train.default == 1].Term.iplot(kind = 'hist')
train[train.default == 0].Term.iplot(kind = 'hist')